Exploring Size-at-age Changes

Loading Bio Data

# Access Biological Data with {gmRi}
# nmfs_bio <- gmRi::gmri_survdat_prep(survdat_source = "bio")
withr::with_dir(rprojroot::find_root('_targets.R'), 
                tar_load(survdat_biological))

# Core cleanup:
# Make 5 year increments
bio_5yr_prep <- function(survdat_biological){
  
  # Drop NA age data, restirct to summer and fall
  nmfs_bio <- survdat_biological %>% 
  filter(is.na(age) == FALSE,
         season %in% c("Summer", "Fall"))
  
  yr5_breaks <- seq(1970, 2020, by = 5)
  yr5_ends <- seq(1974, 2024, by = 5)
  yr5_labels <- str_c(yr5_breaks, "-", yr5_ends)
  yr5_labels <- yr5_labels[1:(length(yr5_labels)-1)]


  #Add 5-year groups, decade labels
  nmfs_bio <- nmfs_bio %>% 
    mutate(
      yearclass = est_year - (age-1),
        five_yr_group = cut(
          est_year, 
          breaks = yr5_breaks, 
          labels = yr5_labels,
          include.lowest = TRUE,
          ordered_result = TRUE),
        decade = floor_decade(est_year)) %>% 
    as.data.frame()
}


# run cleanup
nmfs_bio <- bio_5yr_prep(survdat_biological = survdat_biological)

Fitting Von-Bertalanffy Growth

Species Data Prep

In the original proposal it was stated that 17 species would be used, species that are regularly measured and aged. Ordering all species by the number aged we can pull out the following top species:

####  Data Prep  #### 

# Rank species by how many measurements there are
species_abunds <- nmfs_bio %>% 
  count(comname) %>% 
  arrange(desc(n)) # ordered by number measured

# Reorder alphabetically
vonbert_species <- sort(species_abunds$comname[1:17])

These will be the species that we assess length-at-age characteristics for: acadian redfish, american plaice, atlantic cod, atlantic herring, black sea bass, butterfish, goosefish, haddock, pollock, red hake, scup, silver hake, summer flounder, white hake, winter flounder, witch flounder and yellowtail flounder

We also only care about data from the two main survey seasons, so we limit the data to just those seasons before splitting it out into lists for each species. The lists contain all data for a given species, broken into groups at a set interval of time. In this case it is five year increments.

# Name the list so it carries through
names(vonbert_species) <- vonbert_species

# Does not work for because of bad start points or no age data:
# skates, dogfish, fourspot flounder, goosefish



##### Pull species and make group id for later
species_data <- map(vonbert_species, function(species_name){
  
  # Drop NA ages, set increment labels
  nmfs_bio %>%
    filter(comname == species_name) %>% 
    mutate(group_id = str_c(est_year, comname, sep = "-")) 
})

Age Data Availability

The availability of data for fitting growth curves varies by species & sex for the different seasons and years of the survey.

# check crosstable
#xtabs(~yearclass + age, bind_rows(species_data, .id = "common name"))



bind_rows(species_data, .id = "common name") %>% 
  count(comname, est_year, yearclass, age) %>% 
ggplot(aes(est_year, age, size = n, color = yearclass)) +
  geom_point() +
  scale_color_gmri(discrete = FALSE) +
  labs(x = "Year", y = "Age", size = "Number Aged", color = "Cohort Year") + 
  facet_wrap(~comname, ncol = 3) + 
  guides(color = guide_colorbar(title.position = "top", title.hjust = 0.5),
         size = guide_legend(title.position = "top", title.hjust = 0.5))

Fitting to 5-year periods

The Von-Bertalanffy parameterization used is the “Typical” von Bertalanffy parameterization, also known as the Beverton-Holt parameterization, which is implemented using the {FSA} package.

We can check that this is the case by displaying the equations:

typical <- growthFunShow(param = "Typical", "vonBertalanffy")
ggplot() +
  annotate(x = 1, y = 1, label = typical, geom = "text") +
  labs(title = '"Typical" Parameterization') + 
  theme_void()
bholt <- growthFunShow(param = "BevertonHolt", "vonBertalanffy")
ggplot() +
  annotate(x = 1, y = 1, label = bholt, geom = "text") +
  labs(title = '"Beverton-Holt" Parameterization') + 
  theme_void()

To start things off we first set the von Bert parameterization to solve using nonlinear least squares: nls()

( vb <- vbFuns(param="Typical") )
function (t, Linf, K = NULL, t0 = NULL) 
{
    if (length(Linf) == 3) {
        K <- Linf[[2]]
        t0 <- Linf[[3]]
        Linf <- Linf[[1]]
    }
    Linf * (1 - exp(-K * (t - t0)))
}
<bytecode: 0x7fc762eecbe0>
<environment: 0x7fc762eb7008>

Then once we have set that as the function we want to use, we can write a wrapper around it that will take input length and age data, some reasonable parameter starting points, and some minimum number of observations that we set to make sure groups with very few aged fish don’t break the workflow.

# Function to Pull Vonbert Coefficients
vonbert_coef <- function(length_age_dat, start_points, min_obs = 20){
  
  # Von Bert Fitting Using: {FSA}
  # Don't run on fewer than a minimum number of obervations
  num_aged <- nrow(length_age_dat)
  if(num_aged < min_obs){
    na_df <- data.frame("Linf" = NA, "K" = NA, "t0" = NA, "n_aged" = num_aged, 
                        "len_age_1" = NA, "len_age_2" = NA, "len_age_3" = NA)
    return(na_df)
  }
  
  # Use nls to estimate VBGF parameters using starting points
  vbert_fit <- nls(length_cm ~ vb(age, Linf, K, t0), 
                   data = length_age_dat, 
                   start = start_points)
  
  # Access parameters with coef()
  # Estimate sizes at age
  vbert_coef <- as.data.frame(t(coef(vbert_fit))) %>% 
    mutate(n_aged = num_aged,
           len_age_1 = Linf * (1  - exp(-1 * K * (1 - t0))),
           len_age_2 = Linf * (1  - exp(-1 * K * (2 - t0))),
           len_age_3 = Linf * (1  - exp(-1 * K * (3 - t0))))
  return(vbert_coef)
  
  
}

This next function is a little redundant, but makes it easier to use purrr::map and repeat the estimations for each species:

# Function for running that for a  single Species
species_vonbert <- function(comname, split_col, min_obs){
  
  # Get starting points from all data
  test_starts <-  vbStarts(length_cm ~ age, data = species_data[[comname]])
  
  # Get coefficients for groups
  species_data[[comname]] %>% 
    split(.[split_col]) %>% 
    map_dfr(vonbert_coef, test_starts, min_obs = min_obs, .id = split_col) %>% 
    mutate(comname = comname)
}

Once everything is prepped we can then fun the von Bert parameter estimation for all the species we are interested using the desired groups.

# Running everything?
species_coef <- vonbert_species[c(1:17)] %>% 
  map_dfr(species_vonbert, split_col =  "five_yr_group", min_obs = 30)

Plotting Coefficients

Since the table of coefficients is somewhat simple here is a function to select a species, the x_column to use (corresponding to the time grouping), and the name of the coefficient to plot.

# pick species and coefficients
plot_vonbert_coef <- function(comnames, x_col, coef_name){
  
  x_col_sym <- sym(x_col)
  coef_sym <- sym(coef_name)
  coef_label <- switch (
    coef_name,
    Linf = "Asymptotic Max Length (cm)",
    K = "Body Growth Coefficient (K)",
    t0 = "Hypothetical Age of Zero Size (t0)",
    n_aged = "Number Aged",
    len_age_1 = "Age 1 length (cm)",
    len_age_2 = "Age 2 length (cm)",
    len_age_3 = "Age 3 length (cm)")
  species_coef %>% 
    filter(comname %in% comnames) %>% 
    ggplot(aes(y = {{coef_sym}}, x = {{x_col_sym}})) +
    geom_line(group = 1, linetype = 3) +
    geom_point(size = 0.8) +
    scale_x_discrete(guide = guide_axis(n.dodge = 2)) +
    facet_wrap(~comname, ncol = 1, scales = "free") +
    labs(x = "Date Period", y = coef_label)
  
}

Using that function we can now take a look at the coefficients for any given species/coefficient combination.

L-infinity

plot_vonbert_coef(comnames = c("atlantic cod",  "haddock", "acadian redfish"), 
                  x_col = "five_yr_group", 
                  coef_name = "Linf")

K

plot_vonbert_coef(comnames = c("atlantic cod", "haddock", "acadian redfish"), 
                  x_col = "five_yr_group", 
                  coef_name = "K")

t0

plot_vonbert_coef(comnames = c("atlantic cod", "haddock", "acadian redfish"), 
                  x_col = "five_yr_group", 
                  coef_name = "t0")

Number Aged

plot_vonbert_coef(comnames = c("atlantic cod", "haddock", "acadian redfish"), 
                  x_col = "five_yr_group", 
                  coef_name = "n_aged")

Age 1 Size

plot_vonbert_coef(comnames = c("atlantic cod", "haddock", "acadian redfish"), 
                  x_col = "five_yr_group", 
                  coef_name = "len_age_1")

Age 2 Size

plot_vonbert_coef(comnames = c("atlantic cod", "haddock", "acadian redfish"), 
                  x_col = "five_yr_group", 
                  coef_name = "len_age_2")

Age 3 Size

plot_vonbert_coef(comnames = c("atlantic cod", "haddock", "acadian redfish"), 
                  x_col = "five_yr_group", 
                  coef_name = "len_age_3")

Tracking Cohorts

Bubble Plots

# Plotting age distribution
plot_age_bubbleplot <- function(species){
  age_bubble <- species_data[[species]] %>% 
    count(comname, est_year, age) %>% 
    ggplot(aes(x = est_year, y = age, size = n)) +
      geom_point(shape = 21, color = "white", fill = gmri_cols("gmri blue")) +
      facet_wrap(~comname) +
      labs(x = "Year", y = "Age", size = "Number Measured") +
      guides(size = guide_legend(title.position = "top", title.hjust = 0.5))
  
  size_bubble <- species_data[[species]] %>% 
    count(comname, est_year, age, length_cm) %>% 
    ggplot(aes(x = est_year, y = length_cm)) +
      geom_jitter(aes(color = age), height = 0, width = 0.3, alpha = 0.75) +
      scale_color_gmri(discrete = FALSE) +
      facet_wrap(~comname) +
      labs(x = "Year", y = "Length (cm)", color = "Age")   +
      guides(color = guide_colorbar(title.position = "top", title.hjust = 0.5))
  
  figs <- age_bubble / size_bubble
  return(figs)
  }


# Process Each species
bubble_plots <- map(.x = vonbert_species, .f = plot_age_bubbleplot)

Atlantic Cod

bubble_plots[["atlantic cod"]]

Haddock

bubble_plots[["haddock"]]

Acadian Redfish

bubble_plots[["acadian redfish"]]

Winter Flounder

bubble_plots[["winter flounder"]]

Haddock

bubble_plots[["haddock"]]

Ridgeplots

# Plotting age distribution as ggridges
plot_age_ridgeplot <- function(species){
  species_data[[species]] %>% 
    mutate(yr = factor(est_year),
           yr = fct_rev(yr)) %>% 
    ggplot(aes(x = age, y = yr)) +
      geom_density_ridges(fill = gmri_cols("gmri blue"), color = "white") +
      facet_wrap(~comname) +
      scale_x_continuous(limits = c(0,NA)) +
      labs(x = "Age", y = "Year") +
      guides(size = guide_legend(title.position = "top", title.hjust = 0.5))
  
}

# Process Each species
ridge_plots <- map(vonbert_species, plot_age_ridgeplot)

Atlantic Cod

ridge_plots[["atlantic cod"]]

Haddock

ridge_plots[["haddock"]]

Acadian Redfish

ridge_plots[["acadian redfish"]]

Winter Flounder

ridge_plots[["winter flounder"]]

Haddock

ridge_plots[["haddock"]]

Change in Size at Age

Export Size at Age and Coefficients

 

A work by Adam A. Kemberling

Akemberling@gmri.org